// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "base/strings/utf_offset_string_conversions.h"

#include <stdint.h>

#include <algorithm>
#include <memory>

#include "base/logging.h"
#include "base/strings/string_piece.h"
#include "base/strings/utf_string_conversion_utils.h"

namespace base {

OffsetAdjuster::Adjustment::Adjustment(size_t original_offset,
    size_t original_length,
    size_t output_length)
    : original_offset(original_offset)
    , original_length(original_length)
    , output_length(output_length)
{
}

// static
void OffsetAdjuster::AdjustOffsets(
    const Adjustments& adjustments,
    std::vector<size_t>* offsets_for_adjustment)
{
    if (!offsets_for_adjustment || adjustments.empty())
        return;
    for (std::vector<size_t>::iterator i(offsets_for_adjustment->begin());
         i != offsets_for_adjustment->end(); ++i)
        AdjustOffset(adjustments, &(*i));
}

// static
void OffsetAdjuster::AdjustOffset(const Adjustments& adjustments,
    size_t* offset)
{
    if (*offset == string16::npos)
        return;
    int adjustment = 0;
    for (Adjustments::const_iterator i = adjustments.begin();
         i != adjustments.end(); ++i) {
        if (*offset <= i->original_offset)
            break;
        if (*offset < (i->original_offset + i->original_length)) {
            *offset = string16::npos;
            return;
        }
        adjustment += static_cast<int>(i->original_length - i->output_length);
    }
    *offset -= adjustment;
}

// static
void OffsetAdjuster::UnadjustOffsets(
    const Adjustments& adjustments,
    std::vector<size_t>* offsets_for_unadjustment)
{
    if (!offsets_for_unadjustment || adjustments.empty())
        return;
    for (std::vector<size_t>::iterator i(offsets_for_unadjustment->begin());
         i != offsets_for_unadjustment->end(); ++i)
        UnadjustOffset(adjustments, &(*i));
}

// static
void OffsetAdjuster::UnadjustOffset(const Adjustments& adjustments,
    size_t* offset)
{
    if (*offset == string16::npos)
        return;
    int adjustment = 0;
    for (Adjustments::const_iterator i = adjustments.begin();
         i != adjustments.end(); ++i) {
        if (*offset + adjustment <= i->original_offset)
            break;
        adjustment += static_cast<int>(i->original_length - i->output_length);
        if ((*offset + adjustment) < (i->original_offset + i->original_length)) {
            *offset = string16::npos;
            return;
        }
    }
    *offset += adjustment;
}

// static
void OffsetAdjuster::MergeSequentialAdjustments(
    const Adjustments& first_adjustments,
    Adjustments* adjustments_on_adjusted_string)
{
    Adjustments::iterator adjusted_iter = adjustments_on_adjusted_string->begin();
    Adjustments::const_iterator first_iter = first_adjustments.begin();
    // Simultaneously iterate over all |adjustments_on_adjusted_string| and
    // |first_adjustments|, adding adjustments to or correcting the adjustments
    // in |adjustments_on_adjusted_string| as we go.  |shift| keeps track of the
    // current number of characters collapsed by |first_adjustments| up to this
    // point.  |currently_collapsing| keeps track of the number of characters
    // collapsed by |first_adjustments| into the current |adjusted_iter|'s
    // length.  These are characters that will change |shift| as soon as we're
    // done processing the current |adjusted_iter|; they are not yet reflected in
    // |shift|.
    size_t shift = 0;
    size_t currently_collapsing = 0;
    while (adjusted_iter != adjustments_on_adjusted_string->end()) {
        if ((first_iter == first_adjustments.end()) || ((adjusted_iter->original_offset + shift + adjusted_iter->original_length) <= first_iter->original_offset)) {
            // Entire |adjusted_iter| (accounting for its shift and including its
            // whole original length) comes before |first_iter|.
            //
            // Correct the offset at |adjusted_iter| and move onto the next
            // adjustment that needs revising.
            adjusted_iter->original_offset += shift;
            shift += currently_collapsing;
            currently_collapsing = 0;
            ++adjusted_iter;
        } else if ((adjusted_iter->original_offset + shift) > first_iter->original_offset) {
            // |first_iter| comes before the |adjusted_iter| (as adjusted by |shift|).

            // It's not possible for the adjustments to overlap.  (It shouldn't
            // be possible that we have an |adjusted_iter->original_offset| that,
            // when adjusted by the computed |shift|, is in the middle of
            // |first_iter|'s output's length.  After all, that would mean the
            // current adjustment_on_adjusted_string somehow points to an offset
            // that was supposed to have been eliminated by the first set of
            // adjustments.)
            DCHECK_LE(first_iter->original_offset + first_iter->output_length,
                adjusted_iter->original_offset + shift);

            // Add the |first_adjustment_iter| to the full set of adjustments while
            // making sure |adjusted_iter| continues pointing to the same element.
            // We do this by inserting the |first_adjustment_iter| right before
            // |adjusted_iter|, then incrementing |adjusted_iter| so it points to
            // the following element.
            shift += first_iter->original_length - first_iter->output_length;
            adjusted_iter = adjustments_on_adjusted_string->insert(
                adjusted_iter, *first_iter);
            ++adjusted_iter;
            ++first_iter;
        } else {
            // The first adjustment adjusted something that then got further adjusted
            // by the second set of adjustments.  In other words, |first_iter| points
            // to something in the range covered by |adjusted_iter|'s length (after
            // accounting for |shift|).  Precisely,
            //   adjusted_iter->original_offset + shift
            //   <=
            //   first_iter->original_offset
            //   <=
            //   adjusted_iter->original_offset + shift +
            //       adjusted_iter->original_length

            // Modify the current |adjusted_iter| to include whatever collapsing
            // happened in |first_iter|, then advance to the next |first_adjustments|
            // because we dealt with the current one.
            const int collapse = static_cast<int>(first_iter->original_length) - static_cast<int>(first_iter->output_length);
            // This function does not know how to deal with a string that expands and
            // then gets modified, only strings that collapse and then get modified.
            DCHECK_GT(collapse, 0);
            adjusted_iter->original_length += collapse;
            currently_collapsing += collapse;
            ++first_iter;
        }
    }
    DCHECK_EQ(0u, currently_collapsing);
    if (first_iter != first_adjustments.end()) {
        // Only first adjustments are left.  These do not need to be modified.
        // (Their offsets are already correct with respect to the original string.)
        // Append them all.
        DCHECK(adjusted_iter == adjustments_on_adjusted_string->end());
        adjustments_on_adjusted_string->insert(
            adjustments_on_adjusted_string->end(), first_iter,
            first_adjustments.end());
    }
}

// Converts the given source Unicode character type to the given destination
// Unicode character type as a STL string. The given input buffer and size
// determine the source, and the given output STL string will be replaced by
// the result.  If non-NULL, |adjustments| is set to reflect the all the
// alterations to the string that are not one-character-to-one-character.
// It will always be sorted by increasing offset.
template <typename SrcChar, typename DestStdString>
bool ConvertUnicode(const SrcChar* src,
    size_t src_len,
    DestStdString* output,
    OffsetAdjuster::Adjustments* adjustments)
{
    if (adjustments)
        adjustments->clear();
    // ICU requires 32-bit numbers.
    bool success = true;
    int32_t src_len32 = static_cast<int32_t>(src_len);
    for (int32_t i = 0; i < src_len32; i++) {
        uint32_t code_point;
        size_t original_i = i;
        size_t chars_written = 0;
        if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
            chars_written = WriteUnicodeCharacter(code_point, output);
        } else {
            chars_written = WriteUnicodeCharacter(0xFFFD, output);
            success = false;
        }

        // Only bother writing an adjustment if this modification changed the
        // length of this character.
        // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last
        // character read, not after it (so that incrementing it in the loop
        // increment will place it at the right location), so we need to account
        // for that in determining the amount that was read.
        if (adjustments && ((i - original_i + 1) != chars_written)) {
            adjustments->push_back(OffsetAdjuster::Adjustment(
                original_i, i - original_i + 1, chars_written));
        }
    }
    return success;
}

bool UTF8ToUTF16WithAdjustments(
    const char* src,
    size_t src_len,
    string16* output,
    base::OffsetAdjuster::Adjustments* adjustments)
{
    PrepareForUTF16Or32Output(src, src_len, output);
    return ConvertUnicode(src, src_len, output, adjustments);
}

string16 UTF8ToUTF16WithAdjustments(
    const base::StringPiece& utf8,
    base::OffsetAdjuster::Adjustments* adjustments)
{
    string16 result;
    UTF8ToUTF16WithAdjustments(utf8.data(), utf8.length(), &result, adjustments);
    return result;
}

string16 UTF8ToUTF16AndAdjustOffsets(
    const base::StringPiece& utf8,
    std::vector<size_t>* offsets_for_adjustment)
{
    std::for_each(offsets_for_adjustment->begin(),
        offsets_for_adjustment->end(),
        LimitOffset<base::StringPiece>(utf8.length()));
    OffsetAdjuster::Adjustments adjustments;
    string16 result = UTF8ToUTF16WithAdjustments(utf8, &adjustments);
    OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment);
    return result;
}

std::string UTF16ToUTF8AndAdjustOffsets(
    const base::StringPiece16& utf16,
    std::vector<size_t>* offsets_for_adjustment)
{
    std::for_each(offsets_for_adjustment->begin(),
        offsets_for_adjustment->end(),
        LimitOffset<base::StringPiece16>(utf16.length()));
    std::string result;
    PrepareForUTF8Output(utf16.data(), utf16.length(), &result);
    OffsetAdjuster::Adjustments adjustments;
    ConvertUnicode(utf16.data(), utf16.length(), &result, &adjustments);
    OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment);
    return result;
}

} // namespace base
